Data Visualization Mat no - 12200983 email - Anumula Naga Raviteja

Installing Packages

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)
library(dplyr)
library(forcats)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Loading required package: viridisLite
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(usmap)
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:viridis':
## 
##     unemp
## The following object is masked from 'package:purrr':
## 
##     map

Exploratory Data Analysis

  1. Importing the three vector borne diseases datasets:
DengueDF <- read.csv('US.38362002.csv')
MalariaDF <- read.csv('US.61462000.csv')
WestNileDF <- read.csv('US.430397002.csv')

Head of the datasets:

head(DengueDF)
##   ConditionName ConditionSNOMED PathogenName PathogenTaxonID Fatalities
## 1        Dengue        38362002 Dengue virus           12637          0
## 2        Dengue        38362002 Dengue virus           12637          0
## 3        Dengue        38362002 Dengue virus           12637          0
## 4        Dengue        38362002 Dengue virus           12637          0
## 5        Dengue        38362002 Dengue virus           12637          0
## 6        Dengue        38362002 Dengue virus           12637          0
##                CountryName CountryISO           Admin1Name Admin1ISO Admin2Name
## 1 UNITED STATES OF AMERICA         US            WISCONSIN     US-WI       <NA>
## 2 UNITED STATES OF AMERICA         US             NEW YORK     US-NY       <NA>
## 3 UNITED STATES OF AMERICA         US DISTRICT OF COLUMBIA     US-DC       <NA>
## 4 UNITED STATES OF AMERICA         US              GEORGIA     US-GA       <NA>
## 5 UNITED STATES OF AMERICA         US             COLORADO     US-CO       <NA>
## 6 UNITED STATES OF AMERICA         US            TENNESSEE     US-TN       <NA>
##   CityName PeriodStartDate PeriodEndDate PartOfCumulativeCountSeries AgeRange
## 1     <NA>      2017-07-09    2017-07-15                           0    0-130
## 2     <NA>      2017-07-30    2017-08-05                           0    0-130
## 3     <NA>      2015-02-08    2015-02-14                           0    0-130
## 4     <NA>      2016-08-28    2016-09-03                           0    0-130
## 5     <NA>      2016-09-04    2016-09-10                           0    0-130
## 6     <NA>      2016-08-28    2016-09-03                           0    0-130
##    Subpopulation PlaceOfAcquisition DiagnosisCertainty
## 1 None specified                 NA                 NA
## 2 None specified                 NA                 NA
## 3 None specified                 NA                 NA
## 4 None specified                 NA                 NA
## 5 None specified                 NA                 NA
## 6 None specified                 NA                 NA
##                                             SourceName CountValue
## 1 US Nationally Notifiable Disease Surveillance System          1
## 2 US Nationally Notifiable Disease Surveillance System          1
## 3 US Nationally Notifiable Disease Surveillance System          1
## 4 US Nationally Notifiable Disease Surveillance System          1
## 5 US Nationally Notifiable Disease Surveillance System          1
## 6 US Nationally Notifiable Disease Surveillance System          1
head(MalariaDF)
##   ConditionName ConditionSNOMED PathogenName PathogenTaxonID Fatalities
## 1       Malaria        61462000   Plasmodium            5820          0
## 2       Malaria        61462000   Plasmodium            5820          0
## 3       Malaria        61462000   Plasmodium            5820          0
## 4       Malaria        61462000   Plasmodium            5820          0
## 5       Malaria        61462000   Plasmodium            5820          0
## 6       Malaria        61462000   Plasmodium            5820          0
##                CountryName CountryISO Admin1Name Admin1ISO Admin2Name CityName
## 1 UNITED STATES OF AMERICA         US  WISCONSIN     US-WI       <NA>     <NA>
## 2 UNITED STATES OF AMERICA         US  WISCONSIN     US-WI       <NA>     <NA>
## 3 UNITED STATES OF AMERICA         US  WISCONSIN     US-WI       <NA>     <NA>
## 4 UNITED STATES OF AMERICA         US  WISCONSIN     US-WI       <NA>     <NA>
## 5 UNITED STATES OF AMERICA         US  WISCONSIN     US-WI       <NA>     <NA>
## 6 UNITED STATES OF AMERICA         US  WISCONSIN     US-WI       <NA>     <NA>
##   PeriodStartDate PeriodEndDate PartOfCumulativeCountSeries AgeRange
## 1      1954-08-29    1954-09-04                           0    0-130
## 2      1952-01-13    1952-01-19                           0    0-130
## 3      1952-01-20    1952-01-26                           0    0-130
## 4      1952-01-27    1952-02-02                           0    0-130
## 5      1952-02-03    1952-02-09                           0    0-130
## 6      1952-02-10    1952-02-16                           0    0-130
##    Subpopulation PlaceOfAcquisition DiagnosisCertainty
## 1       Civilian                 NA                 NA
## 2 None specified                 NA                 NA
## 3 None specified                 NA                 NA
## 4 None specified                 NA                 NA
## 5 None specified                 NA                 NA
## 6 None specified                 NA                 NA
##                                             SourceName CountValue
## 1 US Nationally Notifiable Disease Surveillance System          1
## 2 US Nationally Notifiable Disease Surveillance System          2
## 3 US Nationally Notifiable Disease Surveillance System          2
## 4 US Nationally Notifiable Disease Surveillance System          1
## 5 US Nationally Notifiable Disease Surveillance System          2
## 6 US Nationally Notifiable Disease Surveillance System          2
head(WestNileDF)
##                                          ConditionName ConditionSNOMED
## 1 Disorder of nervous system caused by West Nile virus       430397002
## 2 Disorder of nervous system caused by West Nile virus       430397002
## 3 Disorder of nervous system caused by West Nile virus       430397002
## 4 Disorder of nervous system caused by West Nile virus       430397002
## 5 Disorder of nervous system caused by West Nile virus       430397002
## 6 Disorder of nervous system caused by West Nile virus       430397002
##      PathogenName PathogenTaxonID Fatalities              CountryName
## 1 West Nile virus           11082          0 UNITED STATES OF AMERICA
## 2 West Nile virus           11082          0 UNITED STATES OF AMERICA
## 3 West Nile virus           11082          0 UNITED STATES OF AMERICA
## 4 West Nile virus           11082          0 UNITED STATES OF AMERICA
## 5 West Nile virus           11082          0 UNITED STATES OF AMERICA
## 6 West Nile virus           11082          0 UNITED STATES OF AMERICA
##   CountryISO Admin1Name Admin1ISO Admin2Name CityName PeriodStartDate
## 1         US       OHIO     US-OH         NA       NA      2010-04-04
## 2         US       OHIO     US-OH         NA       NA      2011-09-04
## 3         US       OHIO     US-OH         NA       NA      2012-08-05
## 4         US       OHIO     US-OH         NA       NA      2013-09-22
## 5         US       OHIO     US-OH         NA       NA      2015-08-16
## 6         US   MICHIGAN     US-MI         NA       NA      2011-09-18
##   PeriodEndDate PartOfCumulativeCountSeries AgeRange  Subpopulation
## 1    2010-04-10                           0    0-130 None specified
## 2    2011-09-10                           0    0-130 None specified
## 3    2012-08-11                           0    0-130 None specified
## 4    2013-09-28                           0    0-130 None specified
## 5    2015-08-22                           0    0-130 None specified
## 6    2011-09-24                           0    0-130 None specified
##   PlaceOfAcquisition DiagnosisCertainty
## 1                 NA                 NA
## 2                 NA                 NA
## 3                 NA                 NA
## 4                 NA                 NA
## 5                 NA                 NA
## 6                 NA                 NA
##                                             SourceName CountValue
## 1 US Nationally Notifiable Disease Surveillance System          3
## 2 US Nationally Notifiable Disease Surveillance System          1
## 3 US Nationally Notifiable Disease Surveillance System          2
## 4 US Nationally Notifiable Disease Surveillance System          1
## 5 US Nationally Notifiable Disease Surveillance System          1
## 6 US Nationally Notifiable Disease Surveillance System          1

Structure of the datasets: Since all the data is from Tycho the structure of the three datasets are similar, so let’s see the structure of one dataset.

str(DengueDF)
## 'data.frame':    4272 obs. of  20 variables:
##  $ ConditionName              : chr  "Dengue" "Dengue" "Dengue" "Dengue" ...
##  $ ConditionSNOMED            : int  38362002 38362002 38362002 38362002 38362002 38362002 38362002 38362002 38362002 38362002 ...
##  $ PathogenName               : chr  "Dengue virus" "Dengue virus" "Dengue virus" "Dengue virus" ...
##  $ PathogenTaxonID            : int  12637 12637 12637 12637 12637 12637 12637 12637 12637 12637 ...
##  $ Fatalities                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CountryName                : chr  "UNITED STATES OF AMERICA" "UNITED STATES OF AMERICA" "UNITED STATES OF AMERICA" "UNITED STATES OF AMERICA" ...
##  $ CountryISO                 : chr  "US" "US" "US" "US" ...
##  $ Admin1Name                 : chr  "WISCONSIN" "NEW YORK" "DISTRICT OF COLUMBIA" "GEORGIA" ...
##  $ Admin1ISO                  : chr  "US-WI" "US-NY" "US-DC" "US-GA" ...
##  $ Admin2Name                 : chr  NA NA NA NA ...
##  $ CityName                   : chr  NA NA NA NA ...
##  $ PeriodStartDate            : chr  "2017-07-09" "2017-07-30" "2015-02-08" "2016-08-28" ...
##  $ PeriodEndDate              : chr  "2017-07-15" "2017-08-05" "2015-02-14" "2016-09-03" ...
##  $ PartOfCumulativeCountSeries: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AgeRange                   : chr  "0-130" "0-130" "0-130" "0-130" ...
##  $ Subpopulation              : chr  "None specified" "None specified" "None specified" "None specified" ...
##  $ PlaceOfAcquisition         : logi  NA NA NA NA NA NA ...
##  $ DiagnosisCertainty         : logi  NA NA NA NA NA NA ...
##  $ SourceName                 : chr  "US Nationally Notifiable Disease Surveillance System" "US Nationally Notifiable Disease Surveillance System" "US Nationally Notifiable Disease Surveillance System" "US Nationally Notifiable Disease Surveillance System" ...
##  $ CountValue                 : int  1 1 1 1 1 1 1 1 1 1 ...

Importing USA flood data and population data:

FloodData <- read_csv("USAFloodReport.csv")
## Rows: 46 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): State, GlideNumber, Country, OtherCountry, Validation, MainCause
## dbl (7): long, lat, Area, Began, Dead, Displaced, Severity
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
populationDF <- read_csv('PopulationUS.csv')
## Rows: 52 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): States
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(FloodData)
## spec_tbl_df [46 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ State       : chr [1:46] "ILLINOIS" "NORTH DAKOTA" "TEXAS" "IOWA" ...
##  $ GlideNumber : chr [1:46] NA NA NA NA ...
##  $ Country     : chr [1:46] "USA" "USA" "USA" "USA" ...
##  $ OtherCountry: chr [1:46] NA "Canada" NA NA ...
##  $ long        : num [1:46] -88.6 -97.6 -98.9 -93.4 -91.6 ...
##  $ lat         : num [1:46] 38.8 48.6 28.9 41.9 37.1 ...
##  $ Area        : num [1:46] 493137 43181 33908 123500 170079 ...
##  $ Began       : num [1:46] 2013 2013 2013 2013 2013 ...
##  $ Validation  : chr [1:46] "News" "News" "News" "News" ...
##  $ Dead        : num [1:46] 5 0 3 0 5 2 10 4 5 0 ...
##  $ Displaced   : num [1:46] 300 0 300 400 0 100 11000 4800 0 200 ...
##  $ MainCause   : chr [1:46] "Heavy Rain" "Heavy Rain and Snowmelt" "Heavy Rain" "Heavy Rain" ...
##  $ Severity    : num [1:46] 1.5 1 2 1 1.5 1.5 2 2 1 1.5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   State = col_character(),
##   ..   GlideNumber = col_character(),
##   ..   Country = col_character(),
##   ..   OtherCountry = col_character(),
##   ..   long = col_double(),
##   ..   lat = col_double(),
##   ..   Area = col_double(),
##   ..   Began = col_double(),
##   ..   Validation = col_character(),
##   ..   Dead = col_double(),
##   ..   Displaced = col_double(),
##   ..   MainCause = col_character(),
##   ..   Severity = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
str(populationDF)
## spec_tbl_df [52 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ States        : chr [1:52] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ Census        : num [1:52] 4779736 710231 6392017 2915918 37253956 ...
##  $ Estimates Base: num [1:52] 4780125 710249 6392288 2916031 37254519 ...
##  $ 2010          : num [1:52] 4785437 713910 6407172 2921964 37319502 ...
##  $ 2011          : num [1:52] 4799069 722128 6472643 2940667 37638369 ...
##  $ 2012          : num [1:52] 4815588 730443 6554978 2952164 37948800 ...
##  $ 2013          : num [1:52] 4830081 737068 6632764 2959400 38260787 ...
##  $ 2014          : num [1:52] 4841799 736283 6730413 2967392 38596972 ...
##  $ 2015          : num [1:52] 4852347 737498 6829676 2978048 38918045 ...
##  $ 2016          : num [1:52] 4863525 741456 6941072 2989918 39167117 ...
##  $ 2017          : num [1:52] 4874486 739700 7044008 3001345 39358497 ...
##  $ 2018          : num [1:52] 4887681 735139 7158024 3009733 39461588 ...
##  $ 2019          : num [1:52] 4903185 731545 7278717 3017804 39512223 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   States = col_character(),
##   ..   Census = col_number(),
##   ..   `Estimates Base` = col_number(),
##   ..   `2010` = col_number(),
##   ..   `2011` = col_number(),
##   ..   `2012` = col_number(),
##   ..   `2013` = col_number(),
##   ..   `2014` = col_number(),
##   ..   `2015` = col_number(),
##   ..   `2016` = col_number(),
##   ..   `2017` = col_number(),
##   ..   `2018` = col_number(),
##   ..   `2019` = col_number()
##   .. )
##  - attr(*, "problems")=<externalptr>
  1. Cleaning and Processing the Data:

For Dengue Data set

DengueDF <- DengueDF[c('Admin1Name','Admin1ISO','PeriodStartDate','CountValue')] 
DengueDF$PeriodStartDate <- strtrim(DengueDF$PeriodStartDate, 4)
DengueDF$Admin1ISO <- gsub("US-","",as.character(DengueDF$Admin1ISO))
DengueDF <- DengueDF[DengueDF$PeriodStartDate >= "2013" & DengueDF$PeriodStartDate <= "2017",] %>% drop_na()
DengueDF <- setNames(aggregate(DengueDF$CountValue,by=list(DengueDF$Admin1Name,DengueDF$Admin1ISO,DengueDF$PeriodStartDate), FUN=sum), c("StateName","state","Year","Cases"))

For Malaria Data set

MalariaDF <- MalariaDF[c('Admin1Name','Admin1ISO','PeriodStartDate','CountValue')] 
MalariaDF$PeriodStartDate <- strtrim(MalariaDF$PeriodStartDate, 4)
MalariaDF$Admin1ISO <- gsub("US-","",as.character(MalariaDF$Admin1ISO))
MalariaDF <- MalariaDF[MalariaDF$PeriodStartDate >= "2013" & MalariaDF$PeriodStartDate <= "2017",] %>% drop_na()
MalariaDF <- setNames(aggregate(MalariaDF$CountValue,by=list(MalariaDF$Admin1Name,MalariaDF$Admin1ISO,MalariaDF$PeriodStartDate), FUN=sum), c("StateName","state","Year","Cases"))

For West Nile Virus Data set

WestNileDF <- WestNileDF[c('Admin1Name','Admin1ISO','PeriodStartDate','CountValue')] 
WestNileDF$PeriodStartDate <- strtrim(WestNileDF$PeriodStartDate, 4)
WestNileDF$Admin1ISO <- gsub("US-","",as.character(WestNileDF$Admin1ISO))
WestNileDF <- WestNileDF[WestNileDF$PeriodStartDate >= "2013" & WestNileDF$PeriodStartDate <= "2017",] %>% drop_na()
WestNileDF <- setNames(aggregate(WestNileDF$CountValue,by=list(WestNileDF$Admin1Name,WestNileDF$Admin1ISO,WestNileDF$PeriodStartDate), FUN=sum), c("StateName","state","Year","Cases"))

Removing States which are outside of USA

DengueDF <- subset(DengueDF, StateName!="WYOMING" & StateName!="GUAM" & StateName!="DISTRICE OF COLUMBIA" & StateName!="SOUTH DAKOTA" & StateName !="PUERTO RICO")

MalariaDF <- subset(MalariaDF, StateName!="WYOMING" & StateName!="GUAM" & StateName!="DISTRICE OF COLUMBIA" & StateName!="SOUTH DAKOTA" & StateName !="PUERTO RICO")

WestNileDF <- subset(WestNileDF, StateName!="WYOMING" & StateName!="GUAM" & StateName!="DISTRICE OF COLUMBIA" & StateName!="SOUTH DAKOTA" & StateName !="PUERTO RICO")

For Flood Data Set

FloodData <- setNames(FloodData[c(1,8)],c("state","Year"))
FloodDataDF <- as.data.frame(FloodData)
FloodDataDF$Year<-as.character(FloodDataDF$Year)
FloodDataDF$IsFlooded <- "Yes"

For Population Data Set

populationDF <- populationDF[c(1,7,8,9,10,11)]
populationDF <- setNames(cbind(populationDF[1],stack(populationDF[2:6])),c("State","Population","Year"))
populationDF$State <- toupper(populationDF$State)
  1. Visualizing the Data
DengueCases <- DengueDF[c(1,4)]
DengueCases <- setNames(aggregate(DengueCases$Cases,by=list(DengueCases$StateName), FUN=sum), c("state","Cases"))
plot_usmap(data = DengueCases, values = "Cases", color = "#0058F5",labels = TRUE) + 
  scale_fill_continuous(low = "white", high = "#0058F5", name = "Number of Cases", label = scales::comma)+
  labs(title = "Dengue Cases over the years 2013-2017") + 
  theme(panel.background=element_blank())

MalariaCases <- MalariaDF[c(1,4)]
MalariaCases <- setNames(aggregate(MalariaCases$Cases,by=list(MalariaCases$StateName), FUN=sum), c("state","Cases"))
plot_usmap(data = MalariaCases, values = "Cases", color = "#0058F5",labels = TRUE) + 
  scale_fill_continuous(low = "white", high = "#0058F5", name = "Number of Cases", label = scales::comma)+
  labs(title = "Malaria Cases over the years 2013-2017") + 
  theme(panel.background=element_blank())

WestNileCases <- WestNileDF[c(1,4)]
WestNileCases <- setNames(aggregate(WestNileCases$Cases,by=list(WestNileCases$StateName), FUN=sum), c("state","Cases"))
plot_usmap(data = WestNileCases, values = "Cases", color = "#0058F5",labels = TRUE) + 
  scale_fill_continuous(low = "white", high = "#0058F5", name = "Number of Cases", label = scales::comma)+
  labs(title = "West Nile Virus Cases over the years 2013-2017") + 
  theme(panel.background=element_blank())

mal <- setNames(MalariaDF[c(3,4)],c("Year","MalariaCases"))
den <- setNames(DengueDF[c(3,4)],c("Year","DengueCases"))
wnv <- setNames(WestNileDF[c(3,4)],c("Year","WestNileCases"))
mal <- setNames(aggregate(mal$MalariaCases,by=list(mal$Year), FUN = sum), c("Year","Malaria"))
den <- setNames(aggregate(den$DengueCases,by=list(den$Year), FUN = sum), c("Year","Dengue"))
wnv <- setNames(aggregate(wnv$WestNileCases,by=list(wnv$Year), FUN = sum), c("Year","WestNileVirus"))
#put all data frames into list
MalDen <- right_join(den,mal,by = "Year") 
dftest <- right_join(MalDen,wnv,by = "Year")
CombinedDF = dftest[,c(1,2,3,4)] %>% pivot_longer(c(2,3,4),names_to = 'Disease')
ggplot(data = CombinedDF, aes(x=Disease,y=value, color=Disease)) + 
  geom_boxplot()+
  scale_fill_brewer(palette="Green") + 
  geom_jitter(shape=16, position=position_jitter(0.2))+
  labs(title = 'Dengue VS Malaria VS West Nile Virus',
       y='Cases',x='Disease')
## Warning in pal_name(palette, type): Unknown palette Green
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(data = CombinedDF, aes(x=Year, y=value, fill=Disease )) +
 geom_bar(stat = "identity", position = "dodge") +
 theme_ipsum() + # Arial Narrow
  scale_fill_ipsum() +
  labs(title = 'Dengue VS Malaria VS West Nile Virus',
       y='Cases',x='Year')
## Warning: Removed 2 rows containing missing values (geom_bar).

DengueDF$Year<-as.character(DengueDF$Year)
FloodVsDengueDF <- right_join(FloodDataDF,DengueDF,by=c("state"="StateName","Year"="Year"))
FloodVsDengueDF <- FloodVsDengueDF[c(1,2,3,5)]
FloodVsDengueDF <- distinct(FloodVsDengueDF)
FloodVsDengueDF[is.na(FloodVsDengueDF)] <- "No"
ggplot(data = FloodVsDengueDF, aes(x=state, y=Cases, fill=IsFlooded )) +
  geom_bar(stat = "identity", position = "dodge") +
  theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
  labs(title = "Floods impact on Dengue disease")+
    facet_wrap(~Year,nrow = 5)

MalariaDF$Year<-as.character(MalariaDF$Year)
FloodVsMalariaDF <- right_join(FloodDataDF,MalariaDF,by=c("state"="StateName","Year"="Year"))
FloodVsMalariaDF <- FloodVsMalariaDF[c(1,2,3,5)]
FloodVsMalariaDF <- distinct(FloodVsMalariaDF)
FloodVsMalariaDF[is.na(FloodVsMalariaDF)] <- "No"
ggplot(data = FloodVsMalariaDF, aes(x=state, y=Cases, fill=IsFlooded )) +
  geom_bar(stat = "identity", position = "dodge") +
  theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
  labs(title = "Floods impact on Malaria disease")+
    facet_wrap(~Year,nrow = 5)

WestNileDF$Year<-as.character(WestNileDF$Year)
FloodVsWestNileDF <- right_join(FloodDataDF,WestNileDF,by=c("state"="StateName","Year"="Year"))
FloodVsWestNileDF <- FloodVsWestNileDF[c(1,2,3,5)]
FloodVsWestNileDF <- distinct(FloodVsWestNileDF)
FloodVsWestNileDF[is.na(FloodVsWestNileDF)] <- "No"
ggplot(data = FloodVsWestNileDF, aes(x=state, y=Cases, fill=IsFlooded )) +
  geom_bar(stat = "identity", position = "dodge") +
  theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
  labs(title = "Floods impact on West Nile Virus disease")+
    facet_wrap(~Year,nrow = 5)

DengueDF$State <- toupper(DengueDF$State)
PopAndDengueDF <- right_join(populationDF,DengueDF,by=c("State"="StateName","Year"="Year")) %>% na.omit()
PopAndDengueDF <- PopAndDengueDF[,-c(6)]
PopAndDengueDF %>%
  arrange(desc(Population)) %>%
#  mutate(state = factor(state, state)) %>%
  ggplot(aes(x=State, y=Cases, size=Population, color=Year)) +
    geom_point(alpha=0.5) +
  theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
  labs(title = "Population Vs Dengue")+
    scale_size(range = c(.1, 24), name="Population")

PopAndMalDF <- right_join(populationDF,MalariaDF,by=c("State"="StateName","Year"="Year")) %>% na.omit()
PopAndMalDF <- PopAndMalDF[,-c(6)]
PopAndMalDF %>%
  arrange(desc(Population)) %>%
#  mutate(state = factor(state, state)) %>%
  ggplot(aes(x=State, y=Cases, size=Population, color=Year)) +
    geom_point(alpha=0.5) +
  theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
  labs(title = "Population Vs Malaria")+
    scale_size(range = c(.1, 24), name="Population")

PopAndWNVDF <- right_join(populationDF,WestNileDF,by=c("State"="StateName","Year"="Year")) %>% na.omit()
PopAndWNVDF <- PopAndWNVDF[,-c(6)]
PopAndWNVDF %>%
  arrange(desc(Population)) %>%
#  mutate(state = factor(state, state)) %>%
  ggplot(aes(x=State, y=Cases, size=Population, color=Year)) +
    geom_point(alpha=0.5) +
  theme(axis.text.x = element_text(angle = 45, size = 8, vjust = 1, hjust = 1))+
  labs(title = "Population Vs West Nile Virus")+
    scale_size(range = c(.1, 24), name="Population (M)")